*******************************************************************************
clear all
set maxvar 30000
version 14
capture log close
set more off

****************************************************************************************************
* -----   Customize the paths and options:   ----- 
****************************************************************************************************
*cd  "Data\"

*
global MY_IN_PATH   "/Users/ben/Dropbox/RnD_tax_credit/ReStat_repl_package/Data"
global MY_OUT_PATH  "/Users/ben/Dropbox/RnD_tax_credit/ReStat_repl_package/Data"
*global MY_TEMP_PATH "..."

global MY_OUT_FILE  ${MY_OUT_PATH}out.dta
global MY_LOG_FILE  ${MY_OUT_PATH}cr_out.log


*global MY_TEMP_PATH "..."

*global MY_OUT_FILE  ${MY_OUT_PATH}out.dta
*global MY_LOG_FILE  ${MY_OUT_PATH}cr_out.log


*log using "${MY_LOG_FILE}", text replace
****************************************************************************************************
* import science data
*************************************************************************************************

import delimited ${MY_IN_PATH}/text_measure_20231226.tsv, clear

merge 1:1 patent gvkey using ${MY_IN_PATH}/patents_for_emma.dta
keep if _merge ==3

drop if gvkey ==.
drop if ayear ==.

drop if ayear < 1977
drop if ayear > 2006

gegen avg_sim = mean(max), by(gvkey ayear)

keep gvkey ayear avg_sim
ren ayear year

duplicates drop gvkey year, force
sum avg_sim

save ${MY_IN_PATH}/text_measures_20231226.dta, replace




**# Bookmark #1
